/*==============================================================================
案例2：房地产价格预测
文件: case02_house_price_prediction.do
==============================================================================*/

clear all
set more off
capture log close

* 设置工作目录
cd "/Users/mac/git/stata"

* 创建输出目录
capture mkdir output
capture mkdir output/figures

* 开始日志
log using "output/case02_house_price.log", replace text

display "=========================================="
display "案例2：房地产价格预测"
display "=========================================="

/*------------------------------------------------------------------------------
第一步：数据准备
------------------------------------------------------------------------------*/

* 加载数据
sysuse auto, clear

* 数据转换（模拟房价数据）
rename price house_price
rename weight house_size
rename length lot_size
rename mpg energy_efficiency
rename displacement total_rooms
rename gear_ratio age

* 调整数值范围
replace house_price = house_price / 10
replace house_size = house_size / 10
replace lot_size = lot_size / 10
replace total_rooms = total_rooms / 50
replace age = age * 3

* 创建位置变量
gen location = foreign
label define location_lbl 0 "郊区" 1 "市区"
label values location location_lbl

* 数据摘要
summarize house_price house_size lot_size energy_efficiency age

/*------------------------------------------------------------------------------
第二步：简单线性回归
------------------------------------------------------------------------------*/

display ""
display "第二步：简单线性回归"
display "--------------------"

* 模型1：单变量（面积）
regress house_price house_size

display ""
display "面积对房价的影响："
display "  - 每增加1平方米，房价增加 " %5.2f _b[house_size] " 千元"
display "  - R² = " %5.4f e(r2)

* 散点图
twoway (scatter house_price house_size) ///
       (lfit house_price house_size), ///
    title("房价 vs 面积") ///
    xtitle("房屋面积（平方米）") ///
    ytitle("房价（千元）")
graph export "output/figures/case02_price_vs_size.png", replace width(1200)

/*------------------------------------------------------------------------------
第三步：多元线性回归
------------------------------------------------------------------------------*/

display ""
display "第三步：多元线性回归"
display "--------------------"

* 模型2：多变量
regress house_price house_size lot_size energy_efficiency total_rooms age location

display ""
display "多变量模型 R² = " %5.4f e(r2)

/*------------------------------------------------------------------------------
第四步：非线性模型
------------------------------------------------------------------------------*/

display ""
display "第四步：非线性模型（多项式回归）"
display "--------------------------------"

* 创建平方项
gen size_sq = house_size^2
gen lot_sq = lot_size^2

* 多项式回归
regress house_price house_size size_sq lot_size lot_sq ///
    energy_efficiency total_rooms age location

display ""
display "多项式模型 R² = " %5.4f e(r2)

/*------------------------------------------------------------------------------
第五步：交互效应
------------------------------------------------------------------------------*/

display ""
display "第五步：交互效应"
display "----------------"

* 创建交互项
gen size_location = house_size * location

* 包含交互项的模型
regress house_price house_size size_location lot_size ///
    energy_efficiency total_rooms age location

display ""
display "交互效应解读："
display "  - 市区房产的面积溢价更高"

/*------------------------------------------------------------------------------
第六步：模型比较
------------------------------------------------------------------------------*/

display ""
display "第六步：模型比较"
display "----------------"

* 模型1：简单线性
quietly regress house_price house_size
scalar r2_1 = e(r2)
scalar rmse_1 = e(rmse)

* 模型2：多元线性
quietly regress house_price house_size lot_size energy_efficiency total_rooms age location
scalar r2_2 = e(r2)
scalar rmse_2 = e(rmse)

* 模型3：多项式
quietly regress house_price house_size size_sq lot_size lot_sq ///
    energy_efficiency total_rooms age location
scalar r2_3 = e(r2)
scalar rmse_3 = e(rmse)

* 显示比较结果
display ""
display "模型性能比较："
display "  模型1（简单线性）: R² = " %5.4f r2_1 ", RMSE = " %5.2f rmse_1
display "  模型2（多元线性）: R² = " %5.4f r2_2 ", RMSE = " %5.2f rmse_2
display "  模型3（多项式）:   R² = " %5.4f r2_3 ", RMSE = " %5.2f rmse_3

/*------------------------------------------------------------------------------
第七步：管理洞察
------------------------------------------------------------------------------*/

display ""
display "=========================================="
display "管理洞察和建议"
display "=========================================="
display ""

display "1. 房价驱动因素"
display "   ① 房屋面积 - 最重要的因素"
display "   ② 位置 - 市区溢价显著"
display "   ③ 能效评分 - 现代购房者重视"
display "   ④ 房龄 - 负向影响"
display ""

display "2. 定价策略"
display "   - 基于面积的分级定价"
display "   - 市区房产可以有更高溢价"
display "   - 强调能效特性作为卖点"
display "   - 老房子考虑适当折扣"
display ""

display "3. 投资建议"
display "   - 优先投资核心地段"
display "   - 大户型房产增值潜力大"
display "   - 能效改造可提升房价"
display ""

log close

